# *********************************************************************** #
#                         ** Statistical Learning **                      #
#                   ** Lab 11 - Basic Classifiers 1/2 **                  #
# *********************************************************************** #
# Daily-sports ------------------------------------------------------------

# Consider the problem of distinguishing human activities performed while
# wearing inertial and magnetic sensor units.
# The dataset is publicly available at the UCI Machine Learning Repository 
# where it is used to classify 19 activities performed by 8 people that 
# wear sensor units on the chest, arms and legs.
# The 5-min signals were acquired at 25 Hz sampling frequency and divided 
# into 5-sec segments so that 480(=60x8) signal segments are obtained for 
# each activity. 
#
# url: https://archive.ics.uci.edu/ml/datasets/Daily+and+Sports+Activities
#
# For this Lab, we will take the results on just 4 activities (walking, 
# stepper, cross trainer, jumping) performed by a single person (#1) and as 
# covariates the measurements taken by all the 9 sensors (x,y,z 
# accelerometers, x,y,z gyroscopes, x,y,z magnetometers) on each of the 5 
# units on torso (T), right arm (RA), left arm (LA), right leg (RL), 
# left leg (LL) for a total of 45 features.

load("daily-sport.RData")
str(dailysport)
## 'data.frame':    30000 obs. of  46 variables:
##  $ id      : Factor w/ 4 levels "crosstr","jumping",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ T-xAcc  : num  8.72 8.62 8.93 9.07 9.05 ...
##  $ T-yAcc  : num  2.14 2.05 2.05 2.08 1.98 ...
##  $ T-zAcc  : num  3.83 3.45 3.28 3.23 3.41 ...
##  $ T-xGyro : num  0.294 0.23 0.209 0.197 0.238 ...
##  $ T-yGyro : num  -0.252 -0.26 -0.344 -0.241 -0.098 ...
##  $ T-zGyro : num  -0.1506 -0.13245 -0.10109 -0.06141 -0.00907 ...
##  $ T-xMag  : num  -0.577 -0.586 -0.596 -0.605 -0.61 ...
##  $ T-yMag  : num  0.1086 0.0974 0.0896 0.0825 0.075 ...
##  $ T-zMag  : num  -0.697 -0.69 -0.684 -0.675 -0.672 ...
##  $ RA-xAcc : num  8.59 8.5 9.09 9.36 9.34 ...
##  $ RA-yAcc : num  3.51 3.77 3.8 3.77 3.94 ...
##  $ RA-zAcc : num  1.58 1.64 1.62 1.68 1.83 ...
##  $ RA-xGyro: num  0.492 0.252 0.168 0.25 0.269 ...
##  $ RA-yGyro: num  -0.224 -0.381 -0.421 -0.352 -0.276 ...
##  $ RA-zGyro: num  0.532 0.65 0.706 0.597 0.373 ...
##  $ RA-xMag : num  -0.532 -0.55 -0.574 -0.594 -0.614 ...
##  $ RA-yMag : num  -0.476 -0.472 -0.461 -0.452 -0.443 ...
##  $ RA-zMag : num  -0.594 -0.582 -0.571 -0.559 -0.545 ...
##  $ LA-xAcc : num  9.22 8.47 8.49 8.43 8.08 ...
##  $ LA-yAcc : num  -2.77 -2.76 -3.02 -3.07 -3.59 ...
##  $ LA-zAcc : num  3.06 2.72 2.51 2.54 3.06 ...
##  $ LA-xGyro: num  0.301 0.0294 -0.0944 -0.6035 -1.0688 ...
##  $ LA-yGyro: num  -0.0141 -0.0514 -0.0233 0.1012 0.1644 ...
##  $ LA-zGyro: num  -0.488 -0.379 -0.342 -0.325 -0.265 ...
##  $ LA-xMag : num  -0.484 -0.492 -0.503 -0.511 -0.52 ...
##  $ LA-yMag : num  0.727 0.717 0.712 0.709 0.711 ...
##  $ LA-zMag : num  -0.256 -0.262 -0.261 -0.251 -0.229 ...
##  $ RL-xAcc : num  -11.65 -10.9 -10.09 -8.85 -8.46 ...
##  $ RL-yAcc : num  0.112 -1.163 -1.985 -0.721 0.802 ...
##  $ RL-zAcc : num  0.464 1.118 1.285 1.398 1.577 ...
##  $ RL-xGyro: num  -1.69 -1.268 -0.476 -0.305 -0.278 ...
##  $ RL-yGyro: num  0.411 0.437 0.148 0.195 0.235 ...
##  $ RL-zGyro: num  1.55 1.68 1.57 1.55 1.36 ...
##  $ RL-xMag : num  0.794 0.763 0.727 0.689 0.646 ...
##  $ RL-yMag : num  -0.427 -0.485 -0.54 -0.587 -0.636 ...
##  $ RL-zMag : num  0.192 0.177 0.171 0.166 0.164 ...
##  $ LL-xAcc : num  -9.74 -9.53 -9.97 -9.66 -9.77 ...
##  $ LL-yAcc : num  -0.434 -0.495 1.045 -0.436 -0.523 ...
##  $ LL-zAcc : num  -1.46 -1.55 -1.27 -1.04 -1.66 ...
##  $ LL-xGyro: num  -0.291 -0.348 -0.417 -0.234 -0.344 ...
##  $ LL-yGyro: num  0.0618 0.0619 0.0789 -0.0764 0.0258 ...
##  $ LL-zGyro: num  0.32 0.291 0.297 0.478 0.407 ...
##  $ LL-xMag : num  0.8 0.805 0.811 0.817 0.823 ...
##  $ LL-yMag : num  0.437 0.43 0.423 0.412 0.397 ...
##  $ LL-zMag : num  -0.166 -0.159 -0.149 -0.144 -0.143 ...
summary(dailysport)
##        id           T-xAcc            T-yAcc             T-zAcc      
##  crosstr:7500   Min.   :-11.526   Min.   :-14.0190   Min.   :-8.363  
##  jumping:7500   1st Qu.:  6.584   1st Qu.: -0.3961   1st Qu.: 1.420  
##  stepper:7500   Median :  8.751   Median :  0.3336   Median : 2.848  
##  walking:7500   Mean   :  9.227   Mean   :  0.3792   Mean   : 3.029  
##                 3rd Qu.: 10.936   3rd Qu.:  1.3094   3rd Qu.: 3.972  
##                 Max.   : 70.835   Max.   : 14.0120   Max.   :33.093  
##     T-xGyro             T-yGyro            T-zGyro          
##  Min.   :-4.696900   Min.   :-9.85770   Min.   :-2.1615000  
##  1st Qu.:-0.330675   1st Qu.:-0.26165   1st Qu.:-0.1770200  
##  Median : 0.016853   Median : 0.02464   Median :-0.0009475  
##  Mean   : 0.001798   Mean   : 0.01580   Mean   : 0.0014212  
##  3rd Qu.: 0.359052   3rd Qu.: 0.33622   3rd Qu.: 0.1753525  
##  Max.   : 4.530300   Max.   : 5.59380   Max.   : 1.9199000  
##      T-xMag            T-yMag             T-zMag           RA-xAcc        
##  Min.   :-0.9246   Min.   :-0.58331   Min.   :-0.8606   Min.   :-25.2760  
##  1st Qu.:-0.7513   1st Qu.:-0.15989   1st Qu.:-0.6281   1st Qu.: -0.1473  
##  Median :-0.6952   Median :-0.06689   Median :-0.4968   Median :  2.7679  
##  Mean   :-0.7000   Mean   : 0.02185   Mean   :-0.4154   Mean   :  3.1426  
##  3rd Qu.:-0.6171   3rd Qu.: 0.25817   3rd Qu.:-0.2799   3rd Qu.:  7.7249  
##  Max.   :-0.4142   Max.   : 0.56106   Max.   : 0.2717   Max.   : 21.5890  
##     RA-yAcc           RA-zAcc           RA-xGyro       
##  Min.   :-10.587   Min.   :-16.557   Min.   :-7.75480  
##  1st Qu.:  2.839   1st Qu.:  1.847   1st Qu.:-0.43318  
##  Median :  4.864   Median :  3.967   Median : 0.01838  
##  Mean   :  6.086   Mean   :  4.166   Mean   : 0.01600  
##  3rd Qu.:  7.529   3rd Qu.:  6.468   3rd Qu.: 0.47524  
##  Max.   : 51.797   Max.   : 39.010   Max.   : 9.55990  
##     RA-yGyro            RA-zGyro            RA-xMag        
##  Min.   :-6.432800   Min.   :-4.343000   Min.   :-0.94995  
##  1st Qu.:-0.365402   1st Qu.:-0.654562   1st Qu.:-0.46215  
##  Median :-0.014138   Median :-0.017570   Median :-0.22478  
##  Mean   :-0.009334   Mean   : 0.002844   Mean   :-0.25362  
##  3rd Qu.: 0.362628   3rd Qu.: 0.635118   3rd Qu.:-0.01952  
##  Max.   : 4.853700   Max.   : 5.177800   Max.   : 0.93707  
##     RA-yMag           RA-zMag           LA-xAcc            LA-yAcc       
##  Min.   :-1.1059   Min.   :-1.1374   Min.   :-31.6400   Min.   :-57.939  
##  1st Qu.:-0.7037   1st Qu.:-0.6716   1st Qu.:  0.6757   1st Qu.: -6.833  
##  Median :-0.5470   Median :-0.5559   Median :  4.3583   Median : -4.362  
##  Mean   :-0.4846   Mean   :-0.5029   Mean   :  4.0236   Mean   : -5.387  
##  3rd Qu.:-0.2958   3rd Qu.:-0.3964   3rd Qu.:  8.1350   3rd Qu.: -2.387  
##  Max.   : 0.3295   Max.   : 0.3614   Max.   : 50.9240   Max.   : 30.246  
##     LA-zAcc            LA-xGyro            LA-yGyro        
##  Min.   :-23.7440   Min.   :-8.833200   Min.   :-8.381800  
##  1st Qu.:  0.7575   1st Qu.:-0.469542   1st Qu.:-0.382228  
##  Median :  3.2938   Median :-0.004280   Median :-0.024567  
##  Mean   :  2.9418   Mean   :-0.007128   Mean   :-0.004787  
##  3rd Qu.:  6.2104   3rd Qu.: 0.426720   3rd Qu.: 0.368470  
##  Max.   : 24.6540   Max.   :18.809000   Max.   : 4.358900  
##     LA-zGyro             LA-xMag            LA-yMag       
##  Min.   :-12.935000   Min.   :-0.93608   Min.   :-0.9267  
##  1st Qu.: -0.624395   1st Qu.:-0.49630   1st Qu.: 0.3048  
##  Median :  0.027278   Median :-0.03208   Median : 0.4356  
##  Mean   : -0.008746   Mean   :-0.14015   Mean   : 0.3854  
##  3rd Qu.:  0.622652   3rd Qu.: 0.21392   3rd Qu.: 0.5705  
##  Max.   :  5.679100   Max.   : 0.89720   Max.   : 0.8499  
##     LA-zMag           RL-xAcc           RL-yAcc           RL-zAcc        
##  Min.   :-1.1028   Min.   :-89.704   Min.   :-52.727   Min.   :-47.9250  
##  1st Qu.:-0.6849   1st Qu.:-11.917   1st Qu.: -1.034   1st Qu.: -1.1442  
##  Median :-0.4257   Median : -9.704   Median :  1.115   Median : -0.1701  
##  Mean   :-0.2640   Mean   :-10.051   Mean   :  1.084   Mean   : -0.2483  
##  3rd Qu.: 0.1127   3rd Qu.: -7.624   3rd Qu.:  3.362   3rd Qu.:  0.8341  
##  Max.   : 0.7589   Max.   :  3.008   Max.   : 80.427   Max.   : 17.0360  
##     RL-xGyro           RL-yGyro            RL-zGyro        
##  Min.   :-5.14820   Min.   :-2.081100   Min.   :-3.185800  
##  1st Qu.:-0.52341   1st Qu.:-0.198860   1st Qu.:-1.127500  
##  Median : 0.04027   Median : 0.008418   Median :-0.064018  
##  Mean   : 0.01691   Mean   : 0.012355   Mean   :-0.003762  
##  3rd Qu.: 0.54979   3rd Qu.: 0.219955   3rd Qu.: 1.074200  
##  Max.   : 6.01390   Max.   : 3.311100   Max.   : 3.665900  
##     RL-xMag          RL-yMag           RL-zMag            LL-xAcc       
##  Min.   :0.3556   Min.   :-0.7856   Min.   :-0.62804   Min.   :-93.940  
##  1st Qu.:0.4882   1st Qu.:-0.2595   1st Qu.:-0.32134   1st Qu.:-11.757  
##  Median :0.6394   Median :-0.1207   Median :-0.12695   Median : -9.556  
##  Mean   :0.6510   Mean   :-0.1197   Mean   :-0.01181   Mean   : -9.989  
##  3rd Qu.:0.8168   3rd Qu.:-0.0391   3rd Qu.: 0.31716   3rd Qu.: -7.699  
##  Max.   :1.0715   Max.   : 0.7485   Max.   : 0.57962   Max.   :  3.592  
##     LL-yAcc            LL-zAcc            LL-xGyro       
##  Min.   :-95.8980   Min.   :-27.1510   Min.   :-7.15920  
##  1st Qu.: -4.1598   1st Qu.: -1.4636   1st Qu.:-0.56632  
##  Median : -1.7903   Median : -0.3309   Median :-0.06929  
##  Mean   : -1.7952   Mean   : -0.4361   Mean   :-0.03791  
##  3rd Qu.:  0.4201   3rd Qu.:  0.7115   3rd Qu.: 0.48713  
##  Max.   : 49.4040   Max.   : 33.0620   Max.   :10.38100  
##     LL-yGyro            LL-zGyro            LL-xMag      
##  Min.   :-3.605000   Min.   :-4.115000   Min.   :0.2951  
##  1st Qu.:-0.227410   1st Qu.:-1.116200   1st Qu.:0.4771  
##  Median : 0.004304   Median : 0.018875   Median :0.6287  
##  Mean   : 0.004683   Mean   :-0.006432   Mean   :0.6244  
##  3rd Qu.: 0.231900   3rd Qu.: 1.148025   3rd Qu.:0.7691  
##  Max.   : 4.207600   Max.   : 3.526500   Max.   :1.0377  
##     LL-yMag           LL-zMag        
##  Min.   :-0.7177   Min.   :-0.52303  
##  1st Qu.: 0.1047   1st Qu.:-0.27879  
##  Median : 0.2789   Median : 0.01363  
##  Mean   : 0.2626   Mean   :-0.02575  
##  3rd Qu.: 0.4749   3rd Qu.: 0.14591  
##  Max.   : 1.0249   Max.   : 0.62156
# 30000 obs. of  46 variables...can apply any method we like!

# Train-Test split --------------------------------------------------------

# Pretty useful meta-package
# url: http://topepo.github.io/caret/index.html
# install.packages("caret")
suppressMessages(require(caret, quietly = T))
library(help = caret)
?createDataPartition   # to get balanced train-test split

# Train-Test split
set.seed(13413) # set seed for reproducibility
idx.tr = createDataPartition(y = dailysport$id, p = .65, list = FALSE)
head(idx.tr)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         6
## [6,]         7
daily.tr = dailysport[ idx.tr, ]
daily.te = dailysport[-idx.tr, ]

# Take a look (nicely balanced)
table(dailysport$id)
## 
## crosstr jumping stepper walking 
##    7500    7500    7500    7500
table(daily.tr$id)
## 
## crosstr jumping stepper walking 
##    4875    4875    4875    4875
table(daily.te$id)
## 
## crosstr jumping stepper walking 
##    2625    2625    2625    2625
# To avoid problems with the non-standard variable names, let's fix them
names(daily.tr) <- make.names(names(daily.tr))
names(daily.te) <- make.names(names(daily.te))

# LDA ---------------------------------------------------------------------

library(MASS)
?lda

# Fit the LDA model
mod1 = lda(id ~ ., data = daily.tr)
class(mod1)
## [1] "lda"
names(mod1)
##  [1] "prior"   "counts"  "means"   "scaling" "lev"     "svd"     "N"      
##  [8] "call"    "terms"   "xlevels"
# Prediction error on test
pred1 = predict(mod1, daily.te[,-1])
names(pred1)
## [1] "class"     "posterior" "x"
# Take a look
head(pred1$class)        # predicted class
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
head(pred1$posterior)    # per class posterior prob
##         crosstr      jumping      stepper walking
## 5  4.974649e-46 5.493334e-77 6.921924e-66       1
## 8  3.102070e-41 8.662532e-75 1.464732e-66       1
## 15 1.629424e-33 8.300108e-90 1.386280e-50       1
## 16 1.853057e-34 2.834890e-90 1.016351e-51       1
## 17 1.902214e-36 8.638467e-95 8.031013e-50       1
## 24 1.085715e-38 1.166963e-92 8.013088e-48       1
# Missclassification error on test...quite an easy problem! 
mean(pred1$class != daily.te$id)*100
## [1] 0
# Or, quick and dirty confusion matrix
table(pred1$class, daily.te$id)  #...WOW...*this* never happens!
##          
##           crosstr jumping stepper walking
##   crosstr    2625       0       0       0
##   jumping       0    2625       0       0
##   stepper       0       0    2625       0
##   walking       0       0       0    2625
# Naive Bayes (parametric) ------------------------------------------------

# install.packages("e1071")
suppressMessages(require(e1071, quietly = T))
library(help = e1071)
?naiveBayes    # in practice, this is a diagonal LDA!

# Fit the model
mod2 = naiveBayes(id ~ ., data = daily.tr, type = "raw")
class(mod2)
## [1] "naiveBayes"
names(mod2)
## [1] "apriori"   "tables"    "levels"    "isnumeric" "call"
summary(mod2)
##           Length Class  Mode     
## apriori    4     table  numeric  
## tables    45     -none- list     
## levels     4     -none- character
## isnumeric 45     -none- logical  
## call       5     -none- call
# Predict on the test set
pred2 = predict(mod2, daily.te[,-1])   # strangely takes a long time
head(pred2)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# Missclassification error on test
mean(pred2 !=  daily.te$id)*100
## [1] 0
# Once again...
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
## 
##   Package               Library
##   caret                 /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##   ModelMetrics          /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## 
## 
## Using the first match ...
confusionMatrix(pred1$class, daily.te$id)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction crosstr jumping stepper walking
##    crosstr    2625       0       0       0
##    jumping       0    2625       0       0
##    stepper       0       0    2625       0
##    walking       0       0       0    2625
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9996, 1)
##     No Information Rate : 0.25       
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: crosstr Class: jumping Class: stepper
## Sensitivity                    1.00           1.00           1.00
## Specificity                    1.00           1.00           1.00
## Pos Pred Value                 1.00           1.00           1.00
## Neg Pred Value                 1.00           1.00           1.00
## Prevalence                     0.25           0.25           0.25
## Detection Rate                 0.25           0.25           0.25
## Detection Prevalence           0.25           0.25           0.25
## Balanced Accuracy              1.00           1.00           1.00
##                      Class: walking
## Sensitivity                    1.00
## Specificity                    1.00
## Pos Pred Value                 1.00
## Neg Pred Value                 1.00
## Prevalence                     0.25
## Detection Rate                 0.25
## Detection Prevalence           0.25
## Balanced Accuracy              1.00
# Multiclass Logistic -----------------------------------------------------

# The previous methods do not give "natively" any measure of variable 
# importance. Lets try other "classics"...

suppressWarnings(require(viridis, quietly = T))
suppressWarnings(require(nnet, quietly = T))   # multiclass-logistic
?multinom

# Fit the model
mod3 = multinom(id ~ ., data = daily.tr, maxit = 500)
## # weights:  188 (138 variable)
## initial  value 27032.740042 
## iter  10 value 5824.919407
## iter  20 value 3634.465904
## iter  30 value 3045.613043
## iter  40 value 2590.899781
## iter  50 value 2198.433191
## iter  60 value 1650.849970
## iter  70 value 1197.284633
## iter  80 value 617.136851
## iter  90 value 224.817654
## iter 100 value 54.114870
## iter 110 value 7.056134
## iter 120 value 1.043503
## iter 130 value 0.236351
## iter 140 value 0.085437
## iter 150 value 0.024051
## iter 160 value 0.014635
## iter 170 value 0.005450
## iter 180 value 0.002594
## iter 190 value 0.001905
## iter 200 value 0.000307
## iter 210 value 0.000157
## iter 220 value 0.000100
## iter 220 value 0.000100
## iter 220 value 0.000070
## final  value 0.000070 
## converged
class(mod3)
## [1] "multinom" "nnet"
names(mod3)
##  [1] "n"             "nunits"        "nconn"         "conn"         
##  [5] "nsunits"       "decay"         "entropy"       "softmax"      
##  [9] "censored"      "value"         "wts"           "convergence"  
## [13] "fitted.values" "residuals"     "lev"           "call"         
## [17] "terms"         "weights"       "deviance"      "rank"         
## [21] "lab"           "coefnames"     "vcoefnames"    "xlevels"      
## [25] "edf"           "AIC"
# Take a look at the coeff's and their standard errors
# Each row of values corresponds to a profile of that specific level of the
# response against the baseline level of id = "crosstr.
summary(mod3)
## Call:
## multinom(formula = id ~ ., data = daily.tr, maxit = 500)
## 
## Coefficients:
##         (Intercept)     T.xAcc    T.yAcc    T.zAcc   T.xGyro   T.yGyro
## jumping   -119.9712 0.09431812  2.596169 0.9003049  5.871740  8.551042
## stepper   -334.6033 0.94096342  1.068385 3.3374218  1.406063 -6.471650
## walking   -191.1198 0.91520474 -1.436747 2.2903229 -5.018481 10.063600
##           T.zGyro   T.xMag    T.yMag   T.zMag    RA.xAcc   RA.yAcc
## jumping -6.203039 265.9722 274.30636 277.0794 -0.8162727  1.354955
## stepper  4.614142 199.8532 172.69401 173.0864 -1.2170213  1.432096
## walking -5.536947 108.7806  63.18525 134.0281 -0.8021364 -1.229246
##           RA.zAcc   RA.xGyro  RA.yGyro   RA.zGyro   RA.xMag    RA.yMag
## jumping 1.9346590 -2.2608091 9.1695477 -3.4890735 23.428284 -40.974227
## stepper 0.9765970  0.1589911 0.8730202 -2.8143638 63.262345  -4.949428
## walking 0.9433189 -1.8875501 3.2566525 -0.6451893 -9.306769  68.497576
##           RA.zMag    LA.xAcc    LA.yAcc   LA.zAcc  LA.xGyro  LA.yGyro
## jumping -14.92698  1.8402941  0.1670749 2.0505896 -5.586349 -5.976765
## stepper -44.29088  1.4324164  0.6013660 1.8751510 -9.582874  1.666756
## walking  32.98697 -0.9940291 -0.7623836 0.7044377 -3.930608 -5.075649
##            LA.zGyro     LA.xMag   LA.yMag   LA.zMag    RL.xAcc     RL.yAcc
## jumping -2.37033834  12.1671673 -20.02364  18.89019 -0.9483074  0.03581965
## stepper -2.78915209   0.6748684 -33.59423  90.55630 -1.1571255  0.46241635
## walking  0.01703841 -95.8069393  72.16841 -32.63456 -0.7089812 -0.27252263
##            RL.zAcc   RL.xGyro   RL.yGyro  RL.zGyro  RL.xMag   RL.yMag
## jumping -0.4889773  -7.992962  -7.146648  3.014234 322.5412 -41.28186
## stepper -2.7582201 -11.049424 -21.793877 -7.357621 442.5157 -61.11306
## walking -0.6156858  -6.891270 -20.932980 -1.665089 220.0202 -62.49129
##           RL.zMag    LL.xAcc    LL.yAcc   LL.zAcc   LL.xGyro LL.yGyro
## jumping  63.82113  0.4139987 -0.4894601 0.7059678 -1.1941200 2.541247
## stepper -42.52960  0.5638191  0.3200834 1.4854076  0.8468322 7.040384
## walking 104.20619 -0.6532616 -0.5306087 0.1616451  2.5619671 6.861792
##          LL.zGyro  LL.xMag   LL.yMag  LL.zMag
## jumping -1.177770 223.9820 107.17714 58.82988
## stepper  6.202517 347.9663  82.49983 65.86803
## walking  3.975023 195.0242  69.36066 47.97993
## 
## Std. Errors:
##         (Intercept)   T.xAcc   T.yAcc   T.zAcc  T.xGyro  T.yGyro  T.zGyro
## jumping   1656.5141 2600.457 1562.688 4005.482 3978.390 4861.310 1492.252
## stepper    603.8159 2860.045 1854.908 3381.876 2821.328 2493.370 1295.352
## walking   1352.2810 2984.089 3502.452 4661.744 1889.845 2027.137 1227.451
##           T.xMag   T.yMag    T.zMag  RA.xAcc  RA.yAcc  RA.zAcc RA.xGyro
## jumping 970.6951 839.4970  791.8451 2777.046 3433.319 2469.455 4334.161
## stepper 716.9538 679.9532 1245.8416 1922.071 2280.410 1469.470 1589.596
## walking 874.8435 782.9178 1462.5308 4420.329 3885.305 4625.852 3742.918
##         RA.yGyro RA.zGyro  RA.xMag   RA.yMag   RA.zMag  LA.xAcc  LA.yAcc
## jumping 3545.846 2522.311 591.7286  811.8736 1141.6400 1514.700 3283.178
## stepper 5176.006 3989.645 763.9948  897.0333  924.5238 3280.305 3665.341
## walking 3138.834 2384.486 698.1611 1149.4382 1382.4660 4403.968 4471.506
##          LA.zAcc LA.xGyro LA.yGyro LA.zGyro   LA.xMag  LA.yMag   LA.zMag
## jumping 2447.472 3467.671 2069.349 3364.105  668.6878 1310.157 1253.7886
## stepper 3337.972 3648.134 2635.278 2447.612  763.9130 1598.675  697.5111
## walking 4372.615 4536.192 2149.171 3485.364 1027.0284 1402.976  820.8999
##          RL.xAcc  RL.yAcc  RL.zAcc RL.xGyro RL.yGyro RL.zGyro  RL.xMag
## jumping 3119.990 1706.880 3954.498 5254.300 1353.893 1984.906 824.4463
## stepper 3950.530 2774.309 2903.605 3513.349 1034.199 3176.799 625.8474
## walking 3295.081 1957.657 3474.412 4872.635 1739.112 2522.482 893.0134
##           RL.yMag   RL.zMag  LL.xAcc  LL.yAcc  LL.zAcc LL.xGyro LL.yGyro
## jumping  476.1609 1254.9092 4166.593 2512.477 3420.162 3912.207  776.001
## stepper 1310.7632  955.4004 3237.273 2413.297 4434.577 4362.800 1637.360
## walking 1268.4705 1016.0911 2977.161 2953.667 4487.123 4656.093 1683.356
##         LL.zGyro  LL.xMag   LL.yMag   LL.zMag
## jumping 2186.317 711.4678  630.7879 1088.5570
## stepper 3013.659 352.1035 1092.6664  746.7053
## walking 2068.405 780.7586 1423.8971  732.8488
## 
## Residual Deviance: 0.0001390481 
## AIC: 276.0001
t(coef(mod3)) # More readable
##                   jumping      stepper       walking
## (Intercept) -119.97123464 -334.6032874 -191.11983157
## T.xAcc         0.09431812    0.9409634    0.91520474
## T.yAcc         2.59616940    1.0683852   -1.43674703
## T.zAcc         0.90030486    3.3374218    2.29032289
## T.xGyro        5.87173962    1.4060633   -5.01848129
## T.yGyro        8.55104232   -6.4716504   10.06359959
## T.zGyro       -6.20303897    4.6141416   -5.53694719
## T.xMag       265.97215207  199.8531503  108.78055104
## T.yMag       274.30636109  172.6940136   63.18524617
## T.zMag       277.07939323  173.0864187  134.02805229
## RA.xAcc       -0.81627274   -1.2170213   -0.80213639
## RA.yAcc        1.35495531    1.4320958   -1.22924629
## RA.zAcc        1.93465899    0.9765970    0.94331889
## RA.xGyro      -2.26080905    0.1589911   -1.88755010
## RA.yGyro       9.16954775    0.8730202    3.25665250
## RA.zGyro      -3.48907353   -2.8143638   -0.64518930
## RA.xMag       23.42828376   63.2623453   -9.30676873
## RA.yMag      -40.97422728   -4.9494284   68.49757605
## RA.zMag      -14.92698092  -44.2908849   32.98697149
## LA.xAcc        1.84029408    1.4324164   -0.99402905
## LA.yAcc        0.16707486    0.6013660   -0.76238356
## LA.zAcc        2.05058965    1.8751510    0.70443771
## LA.xGyro      -5.58634853   -9.5828745   -3.93060837
## LA.yGyro      -5.97676519    1.6667561   -5.07564850
## LA.zGyro      -2.37033834   -2.7891521    0.01703841
## LA.xMag       12.16716727    0.6748684  -95.80693933
## LA.yMag      -20.02363776  -33.5942266   72.16841381
## LA.zMag       18.89018920   90.5562984  -32.63455988
## RL.xAcc       -0.94830735   -1.1571255   -0.70898121
## RL.yAcc        0.03581965    0.4624164   -0.27252263
## RL.zAcc       -0.48897733   -2.7582201   -0.61568580
## RL.xGyro      -7.99296241  -11.0494238   -6.89126975
## RL.yGyro      -7.14664837  -21.7938772  -20.93298042
## RL.zGyro       3.01423441   -7.3576212   -1.66508899
## RL.xMag      322.54122169  442.5157163  220.02022439
## RL.yMag      -41.28185710  -61.1130625  -62.49128907
## RL.zMag       63.82112628  -42.5296043  104.20619487
## LL.xAcc        0.41399870    0.5638191   -0.65326156
## LL.yAcc       -0.48946013    0.3200834   -0.53060875
## LL.zAcc        0.70596784    1.4854076    0.16164513
## LL.xGyro      -1.19412004    0.8468322    2.56196715
## LL.yGyro       2.54124721    7.0403842    6.86179205
## LL.zGyro      -1.17777023    6.2025173    3.97502310
## LL.xMag      223.98197539  347.9663421  195.02424127
## LL.yMag      107.17714394   82.4998310   69.36066225
## LL.zMag       58.82988401   65.8680280   47.97992635
# 2-tailed z test
z <- summary(mod3)$coefficients/summary(mod3)$standard.errors
head(z)
##         (Intercept)       T.xAcc        T.yAcc       T.zAcc       T.xGyro
## jumping -0.07242391 3.626983e-05  0.0016613483 0.0002247681  0.0014759087
## stepper -0.55414786 3.290030e-04  0.0005759776 0.0009868551  0.0004983693
## walking -0.14133145 3.066948e-04 -0.0004102118 0.0004913018 -0.0026554988
##              T.yGyro      T.zGyro    T.xMag     T.yMag     T.zMag
## jumping  0.001759000 -0.004156831 0.2740018 0.32675085 0.34991616
## stepper -0.002595543  0.003562075 0.2787532 0.25397928 0.13893132
## walking  0.004964440 -0.004510932 0.1243429 0.08070483 0.09164118
##               RA.xAcc       RA.yAcc      RA.zAcc      RA.xGyro
## jumping -0.0002939356  0.0003946488 0.0007834356 -0.0005216256
## stepper -0.0006331822  0.0006279993 0.0006645915  0.0001000198
## walking -0.0001814653 -0.0003163834 0.0002039233 -0.0005042991
##             RA.yGyro      RA.zGyro     RA.xMag      RA.yMag     RA.zMag
## jumping 0.0025859973 -0.0013832847  0.03959296 -0.050468727 -0.01307503
## stepper 0.0001686668 -0.0007054171  0.08280468 -0.005517553 -0.04790670
## walking 0.0010375358 -0.0002705779 -0.01333040  0.059592224  0.02386096
##               LA.xAcc       LA.yAcc      LA.zAcc      LA.xGyro
## jumping  0.0012149559  5.088816e-05 0.0008378398 -0.0016109798
## stepper  0.0004366717  1.640682e-04 0.0005617635 -0.0026267882
## walking -0.0002257121 -1.704982e-04 0.0001611022 -0.0008664996
##              LA.yGyro      LA.zGyro       LA.xMag     LA.yMag     LA.zMag
## jumping -0.0028882352 -7.045971e-04  0.0181955881 -0.01528339  0.01506649
## stepper  0.0006324782 -1.139540e-03  0.0008834362 -0.02101379  0.12982775
## walking -0.0023616777  4.888560e-06 -0.0932855789  0.05143950 -0.03975461
##               RL.xAcc       RL.yAcc       RL.zAcc     RL.xGyro
## jumping -0.0003039457  2.098545e-05 -0.0001236509 -0.001521223
## stepper -0.0002929038  1.666781e-04 -0.0009499294 -0.003144984
## walking -0.0002151635 -1.392086e-04 -0.0001772057 -0.001414280
##             RL.yGyro      RL.zGyro   RL.xMag     RL.yMag     RL.zMag
## jumping -0.005278592  0.0015185777 0.3912216 -0.08669728  0.05085717
## stepper -0.021073194 -0.0023160490 0.7070665 -0.04662403 -0.04451495
## walking -0.012036595 -0.0006600994 0.2463795 -0.04926507  0.10255596
##               LL.xAcc       LL.yAcc      LL.zAcc      LL.xGyro    LL.yGyro
## jumping  9.936143e-05 -0.0001948118 2.064136e-04 -0.0003052292 0.003274799
## stepper  1.741648e-04  0.0001326332 3.349604e-04  0.0001941029 0.004299838
## walking -2.194243e-04 -0.0001796441 3.602422e-05  0.0005502397 0.004076257
##              LL.zGyro   LL.xMag    LL.yMag    LL.zMag
## jumping -0.0005387006 0.3148168 0.16990995 0.05404392
## stepper  0.0020581353 0.9882503 0.07550322 0.08821154
## walking  0.0019217822 0.2497882 0.04871185 0.06547043
# Drop the intercept and plot the log(|z|)
lz = t(log(abs(z[,-1])))
# Plot
xcol = viridis(nlevels(daily.tr[,1]), alpha = c(1,.7,.5,.3))
plot(c(2,44), range(lz), type = "n",
     xlab = "Variable Index", ylab = "log(|z|)", xlim = c(2,44),
     sub = "(* = significat at 5%)")
xpl <- c(-1, 3*(1:14) + .5, 50)
for (i in 0:7){
  rect(xpl[2*i + 1],  -20, xpl[2*i + 2],  10, 
       col = rgb(0,0,0,.2), border = F)
  text(3*(1:15) - 1, -7, gsub("z","",names(daily.tr[,-1])[3*(1:15)]), 
       cex = .7, pos = 1)
}
abline(h = 0, col = rgb(0,0,0,.3), lty = 1, lwd = 2)
matplot(lz, type = "h", col = xcol, lty = 1, lwd = c(1,3,5,6), 
        add = TRUE)
legend("bottom", rownames(z), col = xcol, lwd = c(1,3,5,6), lty = 1,
       bty = "n", cex = .7, horiz = T)
# The test
p <- (1 - pnorm(abs(z), 0, 1))*2
t(p)
##               jumping   stepper   walking
## (Intercept) 0.9422646 0.5794777 0.8876081
## T.xAcc      0.9999711 0.9997375 0.9997553
## T.yAcc      0.9986744 0.9995404 0.9996727
## T.zAcc      0.9998207 0.9992126 0.9996080
## T.xGyro     0.9988224 0.9996024 0.9978812
## T.yGyro     0.9985965 0.9979291 0.9960390
## T.zGyro     0.9966833 0.9971579 0.9964008
## T.xMag      0.7840833 0.7804342 0.9010438
## T.yMag      0.7438563 0.7995116 0.9356767
## T.zMag      0.7264016 0.8895044 0.9269831
## RA.xAcc     0.9997655 0.9994948 0.9998552
## RA.yAcc     0.9996851 0.9994989 0.9997476
## RA.zAcc     0.9993749 0.9994697 0.9998373
## RA.xGyro    0.9995838 0.9999202 0.9995976
## RA.yGyro    0.9979367 0.9998654 0.9991722
## RA.zGyro    0.9988963 0.9994372 0.9997841
## RA.xMag     0.9684176 0.9340068 0.9893642
## RA.yMag     0.9597489 0.9955977 0.9524804
## RA.zMag     0.9895679 0.9617906 0.9809635
## LA.xAcc     0.9990306 0.9996516 0.9998199
## LA.yAcc     0.9999594 0.9998691 0.9998640
## LA.zAcc     0.9993315 0.9995518 0.9998715
## LA.xGyro    0.9987146 0.9979041 0.9993086
## LA.yGyro    0.9976955 0.9994954 0.9981157
## LA.zGyro    0.9994378 0.9990908 0.9999961
## LA.xMag     0.9854828 0.9992951 0.9256767
## LA.yMag     0.9878061 0.9832347 0.9589753
## LA.zMag     0.9879791 0.8967027 0.9682888
## RL.xAcc     0.9997575 0.9997663 0.9998283
## RL.yAcc     0.9999833 0.9998670 0.9998889
## RL.zAcc     0.9999013 0.9992421 0.9998586
## RL.xGyro    0.9987862 0.9974907 0.9988716
## RL.yGyro    0.9957883 0.9831873 0.9903964
## RL.zGyro    0.9987884 0.9981521 0.9994733
## RL.xMag     0.6956334 0.4795252 0.8053885
## RL.yMag     0.9309121 0.9628129 0.9607081
## RL.zMag     0.9594393 0.9644939 0.9183154
## LL.xAcc     0.9999207 0.9998610 0.9998249
## LL.yAcc     0.9998446 0.9998942 0.9998567
## LL.zAcc     0.9998353 0.9997327 0.9999713
## LL.xGyro    0.9997565 0.9998451 0.9995610
## LL.yGyro    0.9973871 0.9965692 0.9967476
## LL.zGyro    0.9995702 0.9983578 0.9984666
## LL.xMag     0.7529008 0.3230301 0.8027512
## LL.yMag     0.8650810 0.9398143 0.9611489
## LL.zMag     0.9569002 0.9297085 0.9477994
t.res = t(p < 0.05)
t.res
##             jumping stepper walking
## (Intercept)   FALSE   FALSE   FALSE
## T.xAcc        FALSE   FALSE   FALSE
## T.yAcc        FALSE   FALSE   FALSE
## T.zAcc        FALSE   FALSE   FALSE
## T.xGyro       FALSE   FALSE   FALSE
## T.yGyro       FALSE   FALSE   FALSE
## T.zGyro       FALSE   FALSE   FALSE
## T.xMag        FALSE   FALSE   FALSE
## T.yMag        FALSE   FALSE   FALSE
## T.zMag        FALSE   FALSE   FALSE
## RA.xAcc       FALSE   FALSE   FALSE
## RA.yAcc       FALSE   FALSE   FALSE
## RA.zAcc       FALSE   FALSE   FALSE
## RA.xGyro      FALSE   FALSE   FALSE
## RA.yGyro      FALSE   FALSE   FALSE
## RA.zGyro      FALSE   FALSE   FALSE
## RA.xMag       FALSE   FALSE   FALSE
## RA.yMag       FALSE   FALSE   FALSE
## RA.zMag       FALSE   FALSE   FALSE
## LA.xAcc       FALSE   FALSE   FALSE
## LA.yAcc       FALSE   FALSE   FALSE
## LA.zAcc       FALSE   FALSE   FALSE
## LA.xGyro      FALSE   FALSE   FALSE
## LA.yGyro      FALSE   FALSE   FALSE
## LA.zGyro      FALSE   FALSE   FALSE
## LA.xMag       FALSE   FALSE   FALSE
## LA.yMag       FALSE   FALSE   FALSE
## LA.zMag       FALSE   FALSE   FALSE
## RL.xAcc       FALSE   FALSE   FALSE
## RL.yAcc       FALSE   FALSE   FALSE
## RL.zAcc       FALSE   FALSE   FALSE
## RL.xGyro      FALSE   FALSE   FALSE
## RL.yGyro      FALSE   FALSE   FALSE
## RL.zGyro      FALSE   FALSE   FALSE
## RL.xMag       FALSE   FALSE   FALSE
## RL.yMag       FALSE   FALSE   FALSE
## RL.zMag       FALSE   FALSE   FALSE
## LL.xAcc       FALSE   FALSE   FALSE
## LL.yAcc       FALSE   FALSE   FALSE
## LL.zAcc       FALSE   FALSE   FALSE
## LL.xGyro      FALSE   FALSE   FALSE
## LL.yGyro      FALSE   FALSE   FALSE
## LL.zGyro      FALSE   FALSE   FALSE
## LL.xMag       FALSE   FALSE   FALSE
## LL.yMag       FALSE   FALSE   FALSE
## LL.zMag       FALSE   FALSE   FALSE
for (i in 1:ncol(t.res)){
  pass = which(t.res[-1,i])
  points(pass, lz[pass,i], pch = "*", col = xcol[i], cex = 2)
}

# The ratio of the probability of choosing one outcome category over the 
# probability of choosing the baseline category (here "crosstr") is often 
# referred as **relative risk** and it is also sometimes referred as 
# **odds** . The **relative risk** is the linear model exponentiated, 
# leading to the fact that the exponentiated regression coefficients 
# are relative risk ratios for a unit change in the predictor variable.
# So extract the coefficients from the model and exponentiate
t(exp(coef(mod3))) # risk ratios
##                   jumping       stepper      walking
## (Intercept)  7.891414e-53 4.826571e-146 9.947450e-84
## T.xAcc       1.098909e+00  2.562449e+00 2.497286e+00
## T.yAcc       1.341226e+01  2.910676e+00 2.376997e-01
## T.zAcc       2.460353e+00  2.814647e+01 9.878127e+00
## T.xGyro      3.548658e+02  4.079863e+00 6.614565e-03
## T.yGyro      5.172143e+03  1.546671e-03 2.347285e+04
## T.zGyro      2.023273e-03  1.009012e+02 3.938532e-03
## T.xMag      3.237710e+115  6.239077e+86 1.749013e+47
## T.yMag      1.348152e+119  1.000132e+75 2.760602e+27
## T.zMag      2.158000e+120  1.480732e+75 1.613034e+58
## RA.xAcc      4.420763e-01  2.961109e-01 4.483700e-01
## RA.yAcc      3.876588e+00  4.187466e+00 2.925130e-01
## RA.zAcc      6.921683e+00  2.655405e+00 2.568492e+00
## RA.xGyro     1.042661e-01  1.172327e+00 1.514424e-01
## RA.yGyro     9.600282e+03  2.394131e+00 2.596248e+01
## RA.zGyro     3.052914e-02  5.994284e-02 5.245632e-01
## RA.xMag      1.495458e+10  2.981862e+27 9.080750e-05
## RA.yMag      1.603685e-18  7.087459e-03 5.599114e+29
## RA.zMag      3.290747e-07  5.817188e-20 2.118652e+14
## LA.xAcc      6.298390e+00  4.188809e+00 3.700826e-01
## LA.yAcc      1.181843e+00  1.824610e+00 4.665530e-01
## LA.zAcc      7.772483e+00  6.521804e+00 2.022709e+00
## LA.xGyro     3.748691e-03  6.889861e-05 1.963173e-02
## LA.yGyro     2.537020e-03  5.294963e+00 6.247034e-03
## LA.zGyro     9.344910e-02  6.147332e-02 1.017184e+00
## LA.xMag      1.923683e+05  1.963775e+00 2.463627e-42
## LA.yMag      2.013004e-09  2.571655e-15 2.199601e+31
## LA.zMag      1.599208e+08  2.128632e+39 6.714145e-15
## RL.xAcc      3.873962e-01  3.143886e-01 4.921453e-01
## RL.yAcc      1.036469e+00  1.587906e+00 7.614562e-01
## RL.zAcc      6.132532e-01  6.340452e-02 5.402702e-01
## RL.xGyro     3.378318e-04  1.589631e-05 1.016622e-03
## RL.yGyro     7.874991e-04  3.427989e-10 8.108156e-10
## RL.zGyro     2.037349e+01  6.377137e-04 1.891738e-01
## RL.xMag     1.196390e+140 1.521016e+192 3.577415e+95
## RL.yMag      1.179009e-18  2.876962e-27 7.250667e-28
## RL.zMag      5.213903e+27  3.385537e-19 1.803746e+45
## LL.xAcc      1.512855e+00  1.757371e+00 5.203459e-01
## LL.yAcc      6.129572e-01  1.377243e+00 5.882468e-01
## LL.zAcc      2.025806e+00  4.416765e+00 1.175443e+00
## LL.xGyro     3.029704e-01  2.332247e+00 1.296129e+01
## LL.yGyro     1.269550e+01  1.141826e+03 9.550771e+02
## LL.zGyro     3.079647e-01  4.939910e+02 5.325135e+01
## LL.xMag      1.879905e+97 1.317839e+151 4.988291e+84
## LL.yMag      3.519186e+46  6.748719e+35 1.327250e+30
## LL.zMag      3.544002e+25  4.037580e+28 6.877289e+20
# So, the relative risk ratio for a one-unit increase in the variable 
# "T-yGyro" is 0.277 for being in "jumping" vs. "crosstr".

# We can calculate predicted probabilities for each of our outcome levels 
# using the fitted function.
head(pp <- fitted(mod3))
##        crosstr      jumping      stepper walking
## 1 3.241515e-56 3.429236e-10 2.244078e-09       1
## 2 8.008832e-53 3.717713e-15 4.093399e-16       1
## 3 4.063881e-51 2.222220e-22 1.552236e-21       1
## 4 3.160934e-49 7.470229e-26 1.178402e-25       1
## 6 6.217021e-48 9.043649e-33 1.288195e-30       1
## 7 1.683406e-46 2.451412e-35 3.751841e-32       1
# Predict on test
pred3 = predict(mod3, daily.te[,-1])
head(pred3)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# Missclassification error on test
mean(pred3 !=  daily.te$id)*100
## [1] 0.00952381
# LASSO Logistic Regression -----------------------------------------------

suppressMessages(require(glmnet, quietly = T)) # l1 penalized-glm 
?glmnet
suppressMessages(require(doMC, quietly = T))
registerDoMC( cores = detectCores() )

# Deviance
mod4 = cv.glmnet(as.matrix(daily.tr[,-1]), daily.tr[,1], family = "multinomial", 
                 type.measure = "deviance",
                 parallel = T)

# Missclassification
mod5 = cv.glmnet(as.matrix(daily.tr[,-1]), daily.tr[,1], family = "multinomial", 
                 type.measure = "class", 
                 parallel = T)

# Let's take a look at the optimal lambda values (pretty close to each other)
log(c("min" = mod4$lambda.min, "1se" = mod4$lambda.1se))
##       min       1se 
## -8.080373 -8.080373
log(c("min" = mod5$lambda.min, "1se" = mod5$lambda.1se))
##       min       1se 
## -5.103293 -5.103293
# Now plot (optimal around 5-7 variables!)
par(mfrow = c(1,2))
plot(mod4)
plot(mod5)

par(mfrow = c(1,1))

# Take a look at the optimal non-zero coeff's under the two 
# (cross-validated) losses.

cc4 <- lapply(coef(mod4), function(x) as.vector(x))
cc5 <- lapply(coef(mod5), function(x) as.vector(x))
cc4.mat <- matrix(unlist(cc4), ncol = length(cc4), byrow = T)
cc5.mat <- matrix(unlist(cc5), ncol = length(cc5), byrow = T)
cc4.mat <- cc4.mat[-1,]
cc5.mat <- cc5.mat[-1,]

# Plot the coeff's matrix & Get the list of active coeff's (per class)
par(mfrow = c(2,2))
act.coeff4 <- list()
act.coeff5 <- list()
for (ix in 1:ncol(cc4.mat)){
  # Get the support
  jnk4 <- cc4.mat[,ix]; jnk4n <- jnk4
  jnk5 <- cc5.mat[,ix]; jnk5n <- jnk5
  idx.act4 = which(  abs(jnk4) > 0 )
  idx.act5 = which(  abs(jnk5) > 0 )
  act.coeff4[[ix]] <- idx.act4
  act.coeff5[[ix]] <- idx.act5
  # Inactivated names
  cf.name4 <- names(daily.tr[,-1]); cf.name4[-idx.act4] <- NA; jnk4n[-idx.act4] <- NA
  cf.name5 <- names(daily.tr[,-1]); cf.name5[-idx.act5] <- NA; jnk5n[-idx.act4] <- NA
  # Plot
  par(mar = c(2,2,1,1))
  # 1 #
  plot(jnk4n, type = "h", ylim = 1.05*range(cc4.mat),
       main = "", xaxt = "n", xlab = "", ylab = "")
  pos.text <- (1*(jnk4 < 0)) + (3*(jnk4 >= 0))
  text(jnk4, cf.name4, cex = .6, pos = pos.text)
  # 2 #
  lines(jnk5n, type = "h", col = rgb(1,0,0,.35), lwd = 4)
  pos.text <- (1*(jnk5 < 0)) + (3*(jnk5 >= 0))
  text(jnk5, cf.name5, cex = .6, pos = pos.text, col = rgb(1,0,0,.6))
  # Annotation
  legend("topleft", c("Deviance", "Miss-classification"), 
         col = c("black", rgb(1,0,0,.35)), lwd = c(1,4),
         cex = .8, bty = "n")
  legend("bottom", paste("Class = ", ix, sep = ""), cex = .8)
  abline(h = 0)
}

par(mfrow = c(1,1))

# Active coeff's (per class)
act.coeff4
## [[1]]
## [1]  2 18 23 27 32 34 35 37 41
## 
## [[2]]
## [1]  4 18 34 36
## 
## [[3]]
## [1]  2  8 11 13 23 31 34 37 38
## 
## [[4]]
## [1]  1  4 10 33 40
act.coeff5
## [[1]]
## [1]  2 23 27 32 35 37
## 
## [[2]]
## [1] 18
## 
## [[3]]
## [1]  2  8 11 13 23 31 34 38
## 
## [[4]]
## [1]  1  4 10 33 40
# Prediction --------------------------------------------------------------

?predict.glmnet
?predict.cv.glmnet

# More statistics
# http://topepo.github.io/caret/index.html
suppressMessages(require(caret, quietly = T))
library(help = caret)
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
## 
##   Package               Library
##   ModelMetrics          /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##   caret                 /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## 
## 
## Using the first match ...
# OK, now for real using the optimal lambdas
previ4a = predict(mod4, newx = as.matrix(daily.te[,-1]), type = "class", s = mod4$lambda.1se)
previ4b = predict(mod4, newx = as.matrix(daily.te[,-1]), type = "class", s = mod4$lambda.min)
c("Dev-1se" = mean(previ4a != daily.te[,1])*100, "Dev-min" = mean(previ4b != daily.te[,1])*100)
## Dev-1se Dev-min 
##       0       0
previ5a = predict(mod5, newx = as.matrix(daily.te[,-1]), type = "class", s = mod5$lambda.1se)
previ5b = predict(mod5, newx = as.matrix(daily.te[,-1]), type = "class", s = mod5$lambda.min)
c("MC-1se" = mean(previ5a != daily.te[,1])*100, "MC-min" = mean(previ5b != daily.te[,1])*100)
## MC-1se MC-min 
##      0      0
# It seems it doesn't make a big difference...and we're using less than 10 vars!

# knn ---------------------------------------------------------------------

library(help = class)
?class::knn
?class::knn1
?class::knn.cv

start <- Sys.time()
mod6a <- class::knn(daily.tr[,-1], daily.te[,-1], daily.tr[,1], 
                    k = 3, prob = T)
tempo_a <- Sys.time() - start
tempo_a
## Time difference of 17.55356 secs
# Take a look
names(attributes(mod6a))
## [1] "levels" "class"  "prob"
head(attributes(mod6a)$prob) # strongly "opinionated"!
## [1] 1 1 1 1 1 1
head(mod6a)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
table(mod6a)
## mod6a
## crosstr jumping stepper walking 
##    2650    2624    2599    2627
table(daily.te$id)
## 
## crosstr jumping stepper walking 
##    2625    2625    2625    2625
# Confusion matrix on test
table(mod6a, daily.te$id) # not bad, but we could try to tweak "k"
##          
## mod6a     crosstr jumping stepper walking
##   crosstr    2620       2      28       0
##   jumping       0    2623       1       0
##   stepper       5       0    2594       0
##   walking       0       0       2    2625
# Faster alternative
suppressMessages(require(FNN, quietly = T))
library(help = FNN)
?FNN::knn

start <- Sys.time()
mod6b <- FNN::knn(daily.tr[,-1], daily.te[,-1], daily.tr[,1], 
                  k = 3, prob = T)
tempo_b <- Sys.time() - start
tempo_b    # way faster!
## Time difference of 4.376986 secs
names(attributes(mod6b))
## [1] "levels"   "class"    "prob"     "nn.index" "nn.dist"
head( attributes(mod6b)$nn.dist )
##          [,1]     [,2]     [,3]
## [1,] 2.000269 2.008701 2.461501
## [2,] 2.235429 2.268110 2.395444
## [3,] 4.044905 4.382678 4.585129
## [4,] 3.581235 4.220713 4.220957
## [5,] 3.391058 3.767062 3.803545
## [6,] 1.812719 2.039704 2.042574
table(mod6b)
## mod6b
## crosstr jumping stepper walking 
##    2650    2624    2599    2627
# Confusion matrix on test
table(mod6b, daily.te$id) # ...exactly the same as before
##          
## mod6b     crosstr jumping stepper walking
##   crosstr    2620       2      28       0
##   jumping       0    2623       1       0
##   stepper       5       0    2594       0
##   walking       0       0       2    2625
# Let's do it via <caret>...
# Caret package provides train() method for training our data 
# for various algorithms. We just need to pass different parameter
# values for different algorithms. 
# Before train() method, we will first use trainControl() method. 
# It controls the computational nuances of the train() method.

# In addition, since the dataset is not that small (10k obs),
# let's go parallel to speed up the cross-validation
# To spice things up a bit, let's use only one type of sensors
suppressWarnings(require(doMC, quietly = T))  # parallel backend
registerDoMC(4)                               # register 4 cores

?trainControl
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(1234) # For reproducibility
knn_fit <- train(id ~ T.xGyro + T.yGyro + T.zGyro,
                 data = daily.tr, method = "knn",
                 trControl  = trctrl,
                 preProcess = c("center", "scale"),
                 tuneLength = 15   # just provide the number, not the values,
                                   # of the tuning parameters to try (here "k",  
                                   # the number of NN)
                 )
# Take a look
knn_fit
## k-Nearest Neighbors 
## 
## 19500 samples
##     3 predictor
##     4 classes: 'crosstr', 'jumping', 'stepper', 'walking' 
## 
## Pre-processing: centered (3), scaled (3) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 17551, 17551, 17550, 17548, 17550, 17549, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa    
##    5  0.5704440  0.4272598
##    7  0.5820517  0.4427374
##    9  0.5876239  0.4501668
##   11  0.5934705  0.4579621
##   13  0.5981196  0.4641610
##   15  0.6001540  0.4668729
##   17  0.6031969  0.4709304
##   19  0.6050771  0.4734373
##   21  0.6054697  0.4739608
##   23  0.6072137  0.4762860
##   25  0.6078462  0.4771297
##   27  0.6092819  0.4790442
##   29  0.6094524  0.4792716
##   31  0.6100165  0.4800236
##   33  0.6099997  0.4800011
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 31.
plot(knn_fit) # not that good now...probably we picked the wrong sensor!

# Prediction is more standard using <caret>
test_pred <- predict(knn_fit, newdata = daily.te)
head(test_pred)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# How Accurately our model is working?
confusionMatrix(test_pred, daily.te$id) #...not that accurately...
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction crosstr jumping stepper walking
##    crosstr    1923     153     334      56
##    jumping     197    1373     474     178
##    stepper     295     474     946     288
##    walking     210     625     871    2103
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6043          
##                  95% CI : (0.5949, 0.6137)
##     No Information Rate : 0.25            
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4724          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: crosstr Class: jumping Class: stepper
## Sensitivity                  0.7326         0.5230         0.3604
## Specificity                  0.9310         0.8922         0.8658
## Pos Pred Value               0.7798         0.6179         0.4723
## Neg Pred Value               0.9126         0.8488         0.8024
## Prevalence                   0.2500         0.2500         0.2500
## Detection Rate               0.1831         0.1308         0.0901
## Detection Prevalence         0.2349         0.2116         0.1908
## Balanced Accuracy            0.8318         0.7076         0.6131
##                      Class: walking
## Sensitivity                  0.8011
## Specificity                  0.7834
## Pos Pred Value               0.5521
## Neg Pred Value               0.9220
## Prevalence                   0.2500
## Detection Rate               0.2003
## Detection Prevalence         0.3628
## Balanced Accuracy            0.7923
# ...since we are using only 3 covariates, this failure is easy to visualize...
suppressMessages(require(plotly, quietly = T))
p <- plot_ly(daily.te, x = ~T.xGyro, y = ~T.yGyro, z = ~T.zGyro, 
             size = .1,
             color = ~test_pred, 
             colors = RColorBrewer::brewer.pal(nlevels(test_pred), "Set1") ) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'T-xGyro'),
                      yaxis = list(title = 'T-yGyro'),
                      zaxis = list(title = 'T-zGyro')))
p